Read data
espresso_deseq2_genetype2_isDET <-
read_tsv(
'Tables/Espresso/espresso_deseq2_genetype2_isDET_2024-04-18.tsv' |>
paste_wd()
)
## Rows: 36717 Columns: 29
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (11): transcript_id, transcript_type, transcript_name, gene_id, gene_typ...
## dbl (18): siMETTL2A_baseMean, siMETTL2A_log2FoldChange, siMETTL2A_lfcSE, siM...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
espresso_deseq2_genetype2_isDET
## # A tibble: 36,717 × 29
## transcript_id transcript_type transcript_name gene_id gene_type gene_name
## <chr> <chr> <chr> <chr> <chr> <chr>
## 1 ENST00000498442.1 retained_intron CRBN-212 ENSG00… protein_… CRBN
## 2 ENST00000459840.5 retained_intron CRBN-205 ENSG00… protein_… CRBN
## 3 ENST00000231948.9 protein_coding CRBN-201 ENSG00… protein_… CRBN
## 4 ENST00000432408.6 protein_coding CRBN-203 ENSG00… protein_… CRBN
## 5 ENST00000339437.… protein_coding TRNT1-203 ENSG00… protein_… TRNT1
## 6 ENST00000488263.5 retained_intron CRBN-209 ENSG00… protein_… CRBN
## 7 ENST00000420393.5 protein_coding TRNT1-207 ENSG00… protein_… TRNT1
## 8 ENST00000698415.1 retained_intron TRNT1-230 ENSG00… protein_… TRNT1
## 9 ENST00000450014.1 protein_coding CRBN-204 ENSG00… protein_… CRBN
## 10 ENST00000698416.1 retained_intron TRNT1-231 ENSG00… protein_… TRNT1
## # ℹ 36,707 more rows
## # ℹ 23 more variables: siMETTL2A_baseMean <dbl>,
## # siMETTL2A_log2FoldChange <dbl>, siMETTL2A_lfcSE <dbl>,
## # siMETTL2A_stat <dbl>, siMETTL2A_pvalue <dbl>, siMETTL2A_padj <dbl>,
## # siMETTL2A_I_baseMean <dbl>, siMETTL2A_I_log2FoldChange <dbl>,
## # siMETTL2A_I_lfcSE <dbl>, siMETTL2A_I_stat <dbl>, siMETTL2A_I_pvalue <dbl>,
## # siMETTL2A_I_padj <dbl>, siMETTL2A_G_baseMean <dbl>, …
sampcomp_results_joined <-
read_tsv(
'Tables/DRS_m3C_sites/sampcomp_results_joined_2024-04-24.tsv.gz' |>
paste_wd()
)
## Rows: 5884004 Columns: 67
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (34): transcript_id, transcript_name, ref_kmer, GMM_cov_type_G, cluster_...
## dbl (33): position, GMM_logit_pvalue_G, KS_dwell_pvalue_G, KS_intensity_pval...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
sampcomp_results_joined
## # A tibble: 5,884,004 × 67
## transcript_id transcript_name position ref_kmer GMM_logit_pvalue_G
## <chr> <chr> <dbl> <chr> <dbl>
## 1 ENST00000264926.7 RAD18-201 1464 TCACA NA
## 2 ENST00000264926.7 RAD18-201 1465 CACAT 1
## 3 ENST00000264926.7 RAD18-201 1466 ACATA NA
## 4 ENST00000264926.7 RAD18-201 1467 CATAA 1
## 5 ENST00000264926.7 RAD18-201 1468 ATAAA NA
## 6 ENST00000264926.7 RAD18-201 1473 AACGA 1
## 7 ENST00000264926.7 RAD18-201 1475 CGATC NA
## 8 ENST00000264926.7 RAD18-201 1486 ACACA NA
## 9 ENST00000264926.7 RAD18-201 1501 CAAGA 1
## 10 ENST00000264926.7 RAD18-201 1502 AAGAC NA
## # ℹ 5,883,994 more rows
## # ℹ 62 more variables: KS_dwell_pvalue_G <dbl>, KS_intensity_pvalue_G <dbl>,
## # GMM_cov_type_G <chr>, GMM_n_clust_G <dbl>, cluster_counts_G <chr>,
## # Logit_LOR_G <chr>, c1_mean_intensity_G <dbl>, c2_mean_intensity_G <dbl>,
## # c1_median_intensity_G <dbl>, c2_median_intensity_G <dbl>,
## # c1_sd_intensity_G <dbl>, c2_sd_intensity_G <dbl>, c1_mean_dwell_G <dbl>,
## # c2_mean_dwell_G <dbl>, c1_median_dwell_G <dbl>, c2_median_dwell_G <dbl>, …
colnames(sampcomp_results_joined)
## [1] "transcript_id" "transcript_name" "position"
## [4] "ref_kmer" "GMM_logit_pvalue_G" "KS_dwell_pvalue_G"
## [7] "KS_intensity_pvalue_G" "GMM_cov_type_G" "GMM_n_clust_G"
## [10] "cluster_counts_G" "Logit_LOR_G" "c1_mean_intensity_G"
## [13] "c2_mean_intensity_G" "c1_median_intensity_G" "c2_median_intensity_G"
## [16] "c1_sd_intensity_G" "c2_sd_intensity_G" "c1_mean_dwell_G"
## [19] "c2_mean_dwell_G" "c1_median_dwell_G" "c2_median_dwell_G"
## [22] "c1_sd_dwell_G" "c2_sd_dwell_G" "intensity_up_G"
## [25] "intensity_down_G" "dwell_up_G" "dwell_down_G"
## [28] "GMM_change_G" "GMM_logit_pvalue_I" "KS_dwell_pvalue_I"
## [31] "KS_intensity_pvalue_I" "GMM_cov_type_I" "GMM_n_clust_I"
## [34] "cluster_counts_I" "Logit_LOR_I" "c1_mean_intensity_I"
## [37] "c2_mean_intensity_I" "c1_median_intensity_I" "c2_median_intensity_I"
## [40] "c1_sd_intensity_I" "c2_sd_intensity_I" "c1_mean_dwell_I"
## [43] "c2_mean_dwell_I" "c1_median_dwell_I" "c2_median_dwell_I"
## [46] "c1_sd_dwell_I" "c2_sd_dwell_I" "intensity_up_I"
## [49] "intensity_down_I" "dwell_up_I" "dwell_down_I"
## [52] "GMM_change_I" "intensity_up" "intensity_down"
## [55] "dwell_up" "dwell_down" "GMM_change"
## [58] "middle_base" "middle_isC" "have_CC_middle"
## [61] "have_C3_middle" "middleC_info" "transcript_type"
## [64] "gene_id" "gene_type" "gene_name"
## [67] "seqname"
Compare among groups
sampcomp_results_intensity_group <-
sampcomp_results_joined |>
select(
transcript_id:ref_kmer,
intensity_up, intensity_down,
#contains('_median_intensity_'), contains('_mean_intensity_')
) |>
group_by(transcript_id, transcript_name) |>
reframe(
intensity_up_group = paste(unique(intensity_up), collapse = ','),
intensity_down_group = paste(unique(intensity_down), collapse = ','),
) |>
mutate(
intensity_group = case_when(
grepl('common', intensity_up_group) & grepl('common', intensity_down_group) ~
'both up and down',
grepl('common', intensity_up_group) & !grepl('common', intensity_down_group) ~
'up',
!grepl('common', intensity_up_group) & grepl('common', intensity_down_group) ~
'down',
.default = 'others'
)
)
sampcomp_results_intensity_group
## # A tibble: 5,297 × 5
## transcript_id transcript_name intensity_up_group intensity_down_group
## <chr> <chr> <chr> <chr>
## 1 ENST00000000233.10 ARF5-201 others others
## 2 ENST00000000412.8 M6PR-201 others others
## 3 ENST00000000442.11 ESRRA-201 others others
## 4 ENST00000001008.6 FKBP4-201 others others
## 5 ENST00000002165.11 FUCA2-201 others others
## 6 ENST00000003100.13 CYP51A1-201 others,only I others
## 7 ENST00000004103.8 TMEM176A-201 others others
## 8 ENST00000005257.7 RALA-201 others others
## 9 ENST00000005260.9 BAIAP2L1-201 others others
## 10 ENST00000005386.8 RPAP3-201 others others
## # ℹ 5,287 more rows
## # ℹ 1 more variable: intensity_group <chr>
sampcomp_results_intensity_group |>
group_by(intensity_group) |>
reframe(n = n())
## # A tibble: 4 × 2
## intensity_group n
## <chr> <int>
## 1 both up and down 6
## 2 down 2
## 3 others 5210
## 4 up 79
espresso_deseq2_genetype2_isDET_intensitygroup <-
espresso_deseq2_genetype2_isDET |>
full_join(sampcomp_results_intensity_group)
## Joining with `by = join_by(transcript_id, transcript_name)`
espresso_deseq2_genetype2_isDET_intensitygroup
## # A tibble: 36,717 × 32
## transcript_id transcript_type transcript_name gene_id gene_type gene_name
## <chr> <chr> <chr> <chr> <chr> <chr>
## 1 ENST00000498442.1 retained_intron CRBN-212 ENSG00… protein_… CRBN
## 2 ENST00000459840.5 retained_intron CRBN-205 ENSG00… protein_… CRBN
## 3 ENST00000231948.9 protein_coding CRBN-201 ENSG00… protein_… CRBN
## 4 ENST00000432408.6 protein_coding CRBN-203 ENSG00… protein_… CRBN
## 5 ENST00000339437.… protein_coding TRNT1-203 ENSG00… protein_… TRNT1
## 6 ENST00000488263.5 retained_intron CRBN-209 ENSG00… protein_… CRBN
## 7 ENST00000420393.5 protein_coding TRNT1-207 ENSG00… protein_… TRNT1
## 8 ENST00000698415.1 retained_intron TRNT1-230 ENSG00… protein_… TRNT1
## 9 ENST00000450014.1 protein_coding CRBN-204 ENSG00… protein_… CRBN
## 10 ENST00000698416.1 retained_intron TRNT1-231 ENSG00… protein_… TRNT1
## # ℹ 36,707 more rows
## # ℹ 26 more variables: siMETTL2A_baseMean <dbl>,
## # siMETTL2A_log2FoldChange <dbl>, siMETTL2A_lfcSE <dbl>,
## # siMETTL2A_stat <dbl>, siMETTL2A_pvalue <dbl>, siMETTL2A_padj <dbl>,
## # siMETTL2A_I_baseMean <dbl>, siMETTL2A_I_log2FoldChange <dbl>,
## # siMETTL2A_I_lfcSE <dbl>, siMETTL2A_I_stat <dbl>, siMETTL2A_I_pvalue <dbl>,
## # siMETTL2A_I_padj <dbl>, siMETTL2A_G_baseMean <dbl>, …
espresso_deseq2_genetype2_isDET_intensitygroup |>
ggplot(aes(y = siMETTL2A_log2FoldChange, x = intensity_group)) +
#stat_ecdf() +
geom_boxplot() +
geom_hline(yintercept = 0)
## Warning: Removed 340 rows containing non-finite outside the scale range
## (`stat_boxplot()`).

Correlation
sampcomp_results_intensity <-
sampcomp_results_joined |>
select(
transcript_id:ref_kmer,
intensity_up, intensity_down,
contains('_median_intensity_'), contains('_mean_intensity_')
) |> # c1: ctrl, C2: KD
mutate(
median_intensity_diff_G = c2_median_intensity_G - c1_median_intensity_G,
median_intensity_diff_I = c2_median_intensity_I - c1_median_intensity_G
)
sampcomp_results_intensity
## # A tibble: 5,884,004 × 16
## transcript_id transcript_name position ref_kmer intensity_up intensity_down
## <chr> <chr> <dbl> <chr> <chr> <chr>
## 1 ENST0000026492… RAD18-201 1464 TCACA others others
## 2 ENST0000026492… RAD18-201 1465 CACAT others others
## 3 ENST0000026492… RAD18-201 1466 ACATA others others
## 4 ENST0000026492… RAD18-201 1467 CATAA others others
## 5 ENST0000026492… RAD18-201 1468 ATAAA others others
## 6 ENST0000026492… RAD18-201 1473 AACGA others others
## 7 ENST0000026492… RAD18-201 1475 CGATC others others
## 8 ENST0000026492… RAD18-201 1486 ACACA others others
## 9 ENST0000026492… RAD18-201 1501 CAAGA others others
## 10 ENST0000026492… RAD18-201 1502 AAGAC others others
## # ℹ 5,883,994 more rows
## # ℹ 10 more variables: c1_median_intensity_G <dbl>,
## # c2_median_intensity_G <dbl>, c1_median_intensity_I <dbl>,
## # c2_median_intensity_I <dbl>, c1_mean_intensity_G <dbl>,
## # c2_mean_intensity_G <dbl>, c1_mean_intensity_I <dbl>,
## # c2_mean_intensity_I <dbl>, median_intensity_diff_G <dbl>,
## # median_intensity_diff_I <dbl>
espresso_deseq2_genetype2_isDET_intensitydiff <-
espresso_deseq2_genetype2_isDET |>
full_join(sampcomp_results_intensity)
## Joining with `by = join_by(transcript_id, transcript_name)`
espresso_deseq2_genetype2_isDET_intensitydiff
## # A tibble: 5,915,424 × 43
## transcript_id transcript_type transcript_name gene_id gene_type gene_name
## <chr> <chr> <chr> <chr> <chr> <chr>
## 1 ENST00000498442.1 retained_intron CRBN-212 ENSG00… protein_… CRBN
## 2 ENST00000459840.5 retained_intron CRBN-205 ENSG00… protein_… CRBN
## 3 ENST00000231948.9 protein_coding CRBN-201 ENSG00… protein_… CRBN
## 4 ENST00000432408.6 protein_coding CRBN-203 ENSG00… protein_… CRBN
## 5 ENST00000339437.… protein_coding TRNT1-203 ENSG00… protein_… TRNT1
## 6 ENST00000488263.5 retained_intron CRBN-209 ENSG00… protein_… CRBN
## 7 ENST00000420393.5 protein_coding TRNT1-207 ENSG00… protein_… TRNT1
## 8 ENST00000698415.1 retained_intron TRNT1-230 ENSG00… protein_… TRNT1
## 9 ENST00000450014.1 protein_coding CRBN-204 ENSG00… protein_… CRBN
## 10 ENST00000698416.1 retained_intron TRNT1-231 ENSG00… protein_… TRNT1
## # ℹ 5,915,414 more rows
## # ℹ 37 more variables: siMETTL2A_baseMean <dbl>,
## # siMETTL2A_log2FoldChange <dbl>, siMETTL2A_lfcSE <dbl>,
## # siMETTL2A_stat <dbl>, siMETTL2A_pvalue <dbl>, siMETTL2A_padj <dbl>,
## # siMETTL2A_I_baseMean <dbl>, siMETTL2A_I_log2FoldChange <dbl>,
## # siMETTL2A_I_lfcSE <dbl>, siMETTL2A_I_stat <dbl>, siMETTL2A_I_pvalue <dbl>,
## # siMETTL2A_I_padj <dbl>, siMETTL2A_G_baseMean <dbl>, …
espresso_deseq2_genetype2_isDET_intensitydiff |>
ggplot(aes(
x = median_intensity_diff_G, y = siMETTL2A_G_log2FoldChange
)) +
geom_hex(bins = 100) +
stat_smooth(method = 'lm') +
geom_vline(xintercept = c(0), alpha = 1/5) +
geom_hline(yintercept = c(0), alpha = 1/5) +
scale_fill_viridis_c(trans = 'log10')
## Warning: Removed 64846 rows containing non-finite outside the scale range
## (`stat_binhex()`).
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 64846 rows containing non-finite outside the scale range
## (`stat_smooth()`).

espresso_deseq2_genetype2_isDET_intensitydiff |>
plot_2dhistogram_withcortest(
x = median_intensity_diff_G, y = siMETTL2A_G_log2FoldChange,
n_bins = 100
)
## Warning in cor.test.default(x = mf[[1L]], y = mf[[2L]], ...): Cannot compute
## exact p-value with ties
## # A tibble: 2 × 9
## estimate statistic p.value method method_short alternative parameter conf.low
## <dbl> <dbl> <dbl> <chr> <chr> <chr> <int> <dbl>
## 1 -0.00625 3.36e19 1.47e-51 Spear… Spearman two.sided NA NA
## 2 -0.00318 -7.69e 0 1.48e-14 Pears… Pearson two.sided 5850576 -0.00399
## # ℹ 1 more variable: conf.high <dbl>
## [1] 5850578
## # A tibble: 2 × 1
## msg
## <chr>
## 1 Spearman: r = -0.01, p < 2.2e-16, n = 5850578
## 2 Pearson: r = 0, p = 1.48e-14, n = 5850578

espresso_deseq2_genetype2_isDET_intensitydiff |>
plot_2dhistogram_withcortest(
x = median_intensity_diff_I, y = siMETTL2A_I_log2FoldChange,
n_bins = 100
)
## Warning in cor.test.default(x = mf[[1L]], y = mf[[2L]], ...): Cannot compute
## exact p-value with ties
## # A tibble: 2 × 9
## estimate statistic p.value method method_short alternative parameter conf.low
## <dbl> <dbl> <dbl> <chr> <chr> <chr> <int> <dbl>
## 1 -0.0109 5.61e18 1.93e-85 Spear… Spearman two.sided NA NA
## 2 -0.00547 -9.81e 0 1.00e-22 Pears… Pearson two.sided 3217023 -0.00656
## # ℹ 1 more variable: conf.high <dbl>
## [1] 3217025
## # A tibble: 2 × 1
## msg
## <chr>
## 1 Spearman: r = -0.01, p < 2.2e-16, n = 3217025
## 2 Pearson: r = -0.01, p < 2.2e-16, n = 3217025

plot_intensity_coverage_change_correlation <- function(df, .x, .y) {
df |>
ggplot(aes(
x = {{.x}}, y = {{.y}}
)) +
geom_hex(bins = 100) +
# stat_smooth(method = 'lm') +
geom_vline(xintercept = c(0), alpha = 1/5) +
geom_hline(yintercept = c(0), alpha = 1/5) +
scale_x_continuous(limits = c(-20, 20)) +
scale_y_continuous(limits = c(-10, 10)) +
scale_fill_viridis_c(trans = 'log10')
}
currentintensity_coverage_correlation_G <-
espresso_deseq2_genetype2_isDET_intensitydiff |>
plot_intensity_coverage_change_correlation(
.x = median_intensity_diff_G, .y = siMETTL2A_G_log2FoldChange
)
currentintensity_coverage_correlation_G |>
ggsave_pdf(
width = 5, height = 6, outdir = figdir
)
## Warning: Removed 66072 rows containing non-finite outside the scale range
## (`stat_binhex()`).
## Warning: Removed 66072 rows containing non-finite outside the scale range
## (`stat_binhex()`).

currentintensity_coverage_correlation_I <-
espresso_deseq2_genetype2_isDET_intensitydiff |>
plot_intensity_coverage_change_correlation(
.x = median_intensity_diff_I, .y = siMETTL2A_I_log2FoldChange
)
currentintensity_coverage_correlation_I |>
ggsave_pdf(
width = 5, height = 6, outdir = figdir
)
## Warning: Removed 2698411 rows containing non-finite outside the scale range
## (`stat_binhex()`).
## Warning: Removed 2698411 rows containing non-finite outside the scale range
## (`stat_binhex()`).

m3C_sites <-
sampcomp_results_joined |>
filter(intensity_up == 'common') |>
filter(middle_base == 'C')
m3C_sites
## # A tibble: 489 × 67
## transcript_id transcript_name position ref_kmer GMM_logit_pvalue_G
## <chr> <chr> <dbl> <chr> <dbl>
## 1 ENST00000429711.7 RPL32-204 422 GCCCA 1
## 2 ENST00000647248.2 RPL35A-211 380 ACCCC 1
## 3 ENST00000647248.2 RPL35A-211 381 CCCCT 1
## 4 ENST00000389680.2 MT-RNR1-201 57 CCCCG 1
## 5 ENST00000389680.2 MT-RNR1-201 75 ACCCT 0.777
## 6 ENST00000389680.2 MT-RNR1-201 93 ATCAA 1
## 7 ENST00000389680.2 MT-RNR1-201 148 GCCAC 1
## 8 ENST00000389680.2 MT-RNR1-201 153 ACCCC 1
## 9 ENST00000389680.2 MT-RNR1-201 154 CCCCC 1
## 10 ENST00000389680.2 MT-RNR1-201 155 CCCCA 1
## # ℹ 479 more rows
## # ℹ 62 more variables: KS_dwell_pvalue_G <dbl>, KS_intensity_pvalue_G <dbl>,
## # GMM_cov_type_G <chr>, GMM_n_clust_G <dbl>, cluster_counts_G <chr>,
## # Logit_LOR_G <chr>, c1_mean_intensity_G <dbl>, c2_mean_intensity_G <dbl>,
## # c1_median_intensity_G <dbl>, c2_median_intensity_G <dbl>,
## # c1_sd_intensity_G <dbl>, c2_sd_intensity_G <dbl>, c1_mean_dwell_G <dbl>,
## # c2_mean_dwell_G <dbl>, c1_median_dwell_G <dbl>, c2_median_dwell_G <dbl>, …
m3C_RNAs <-
m3C_sites |>
select(transcript_id, transcript_name) |>
distinct() |>
left_join(sampcomp_results_joined)
## Joining with `by = join_by(transcript_id, transcript_name)`
m3C_RNAs
## # A tibble: 60,206 × 67
## transcript_id transcript_name position ref_kmer GMM_logit_pvalue_G
## <chr> <chr> <dbl> <chr> <dbl>
## 1 ENST00000429711.7 RPL32-204 30 TCCTC NA
## 2 ENST00000429711.7 RPL32-204 31 CCTCG 1
## 3 ENST00000429711.7 RPL32-204 32 CTCGG 1
## 4 ENST00000429711.7 RPL32-204 33 TCGGC 1
## 5 ENST00000429711.7 RPL32-204 34 CGGCG 1
## 6 ENST00000429711.7 RPL32-204 35 GGCGC 1
## 7 ENST00000429711.7 RPL32-204 36 GCGCT 1
## 8 ENST00000429711.7 RPL32-204 37 CGCTG 1
## 9 ENST00000429711.7 RPL32-204 38 GCTGC 1
## 10 ENST00000429711.7 RPL32-204 39 CTGCC 1
## # ℹ 60,196 more rows
## # ℹ 62 more variables: KS_dwell_pvalue_G <dbl>, KS_intensity_pvalue_G <dbl>,
## # GMM_cov_type_G <chr>, GMM_n_clust_G <dbl>, cluster_counts_G <chr>,
## # Logit_LOR_G <chr>, c1_mean_intensity_G <dbl>, c2_mean_intensity_G <dbl>,
## # c1_median_intensity_G <dbl>, c2_median_intensity_G <dbl>,
## # c1_sd_intensity_G <dbl>, c2_sd_intensity_G <dbl>, c1_mean_dwell_G <dbl>,
## # c2_mean_dwell_G <dbl>, c1_median_dwell_G <dbl>, c2_median_dwell_G <dbl>, …
m3C_RNAs_allsites_intensity_group <-
m3C_RNAs |>
group_by(intensity_up, intensity_down) |>
reframe(n = n()) |>
mutate(
intensity_group = case_when(
intensity_up == 'common' ~ 'up',
intensity_down == 'common' ~ 'down',
.default = 'others'
)
)
m3C_RNAs_allsites_intensity_group
## # A tibble: 9 × 4
## intensity_up intensity_down n intensity_group
## <chr> <chr> <int> <chr>
## 1 common others 588 up
## 2 only G only I 5 others
## 3 only G others 1757 others
## 4 only I only G 1 others
## 5 only I others 322 others
## 6 others common 19 down
## 7 others only G 39 others
## 8 others only I 166 others
## 9 others others 57309 others
add_yrange <- function(df) {
new_df <- df |>
mutate(ymax = cumsum(percentage / 100))
new_df$ymin <- c(0, head(new_df$ymax, n = -1))
return(new_df)
}
donutplot <- function(df, var, col, color_values) {
df |>
add_yrange() |>
ggplot(aes(
xmin = 3, xmax = 4, ymin = ymin, ymax = ymax,
fill = {{ col }}, #colour = {{ col }}
)) +
geom_rect() +
coord_polar(theta = 'y') +
ggrepel::geom_text_repel(
aes(label = {{ var }}, y = (ymin + ymax) / 2), x = 1
) +
scale_fill_manual(values = color_values) +
xlim(c(-1,4)) +
theme_void()
}
percentage_intensity_up_sites_in_m3CRNAs <-
m3C_RNAs_allsites_intensity_group |>
arrange(intensity_group) |>
mutate(percentage = 100 * n / sum(n)) |>
# group_by(intensity_group) |>
# reframe(sum_percentage = sum(percentage))
donutplot(
var = intensity_group, col = intensity_group,
color_values = c('#0000aa', '#999999', '#aa0000')
)
percentage_intensity_up_sites_in_m3CRNAs |>
ggsave_pdf(width = 6, height = 4, outdir = figdir)
## Warning: ggrepel: 8 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 8 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
